Static illustration of turnstile data

library(tidyverse)
# read data
turnstile = read.csv("2015_manhattan_turnstile_usage.csv")

1.Average by day of week

# GroupBy 1.day & 2.interval --> average entry & exit volume
data1 <- turnstile %>% select(interval, day, entry_volume, exit_volume)   %>% group_by(day, interval) %>% summarise(avg_entry = mean(entry_volume), avg_exit = mean(exit_volume))
# Reoreder by day & interval
data1$day <- factor(data1$day, c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
data1$interval <- factor(data1$interval, c("08PM-12AM","04PM-08PM","12PM-04PM","08AM-12PM","04AM-08AM","12AM-04AM"))
ggplot(data1, aes(y = avg_entry, x = interval)) + 
  geom_col(col='#0072B2', fill="#66CC99")  + ylab("Entry Count") + xlab("Interval") + facet_wrap(~ day) + coord_flip()

ggplot(data1, aes(y = avg_exit, x = interval)) + 
  geom_col(col='#0072B2', fill='#E69F00')  + ylab("Exit Count") + xlab("Interval") + facet_wrap(~ day) + coord_flip()

2-1.Average by all

# GroupBy interval --> average entry & exit volume
data2_1 <- turnstile %>% select(interval, entry_volume, exit_volume)   %>% group_by(interval) %>% summarise(avg_entry = mean(entry_volume), avg_exit = mean(exit_volume))
# Reoreder interval
data2_1$interval <- factor(data2_1$interval, c("08PM-12AM","04PM-08PM","12PM-04PM","08AM-12PM","04AM-08AM","12AM-04AM"))
ggplot(data2_1, aes(y = avg_entry, x = interval)) + 
  geom_col(col='#0072B2', fill="#66CC99")  + ylab("Entry Count") + xlab("Interval") + coord_flip()

ggplot(data2_1, aes(y = avg_exit, x = interval)) + 
  geom_col(col='#0072B2', fill='#E69F00')  + ylab("Exit Count") + xlab("Interval")  + coord_flip()

2-2.Average by weekday vs weekend & holiday

# GroupBy 1.day & 2.interval --> average entry & exit volume
turnstile$is_holiday <- as.character(turnstile$is_holiday)
data2_2 <- turnstile %>% select(interval, day, is_holiday, entry_volume, exit_volume)   %>% group_by(day, is_holiday, interval) %>% summarise(avg_entry = mean(entry_volume), avg_exit = mean(exit_volume))

# Change the value of "day" to "Weekday" or "Weekend"
day_list = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday")
for (i in day_list){
  if (i != "Saturday" & i != "Sunday"){
    data2_2[,"day"] <- data.frame(lapply(data2_2[,"day"], function(x) {gsub(i, "Weekday", x)}))
  }
  else {
    data2_2[,"day"] <- data.frame(lapply(data2_2[,"day"], function(x) {gsub(i, "Weekend", x)}))
  }
}

data2_2 <- data2_2 %>% ungroup() %>% mutate(day2 = if_else(day == "Weekday" & is_holiday == "False", "Weekday", "Weekend"))

data2_2 <- data2_2 %>% group_by(day, interval) %>% summarise(avg_entry = mean(avg_entry), avg_exit = mean(avg_exit))

data2_2$interval <- factor(data2_2$interval, c("08PM-12AM","04PM-08PM","12PM-04PM","08AM-12PM","04AM-08AM","12AM-04AM"))
ggplot(data2_2, aes(y = avg_entry, x = interval)) + 
  geom_col(col='#0072B2', fill="#66CC99")  + ylab("Entry Count") + xlab("Interval") + facet_wrap(~ day) + coord_flip()

ggplot(data2_2, aes(y = avg_exit, x = interval)) + 
  geom_col(col='#0072B2', fill='#E69F00')  + ylab("Exit Count") + xlab("Interval") + facet_wrap(~ day) + coord_flip()

3-1.Average by line & day of week

data3_1 <- turnstile %>% select(day, lines, entry_volume, exit_volume)   %>% group_by(lines, day) %>% summarise(avg_entry = mean(entry_volume), avg_exit = mean(exit_volume))
# Reoreder by day & interval
data3_1$day <- factor(data3_1$day, c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
#datainterval <- factor(data1$interval,c("08PM-12AM","04PM-08PM","12PM-04PM","08AM-12PM","04AM-08AM","12AM-04AM"))
data3_1 %>%
  ungroup() %>%
  arrange(avg_entry) %>%
  mutate(lines = reorder(lines, avg_entry)) %>%
  ggplot(aes(y = avg_entry, x = lines)) + 
  geom_col(col='#0072B2', fill="#66CC99")  + ylab("Entry Count") + xlab("Line") + facet_wrap(~ day) + coord_flip()

data3_1 %>%
  ungroup() %>%
  arrange(avg_exit) %>%
  mutate(lines = reorder(lines, avg_entry)) %>%
  ggplot(aes(y = avg_exit, x = lines)) + 
  geom_col(col='#0072B2', fill='#E69F00')  + ylab("Exit Count") + xlab("Line") + facet_wrap(~ day) + coord_flip()

3-2.Average by line & interval

data3_2 <- turnstile %>% select(interval, lines, entry_volume, exit_volume)   %>% group_by(lines, interval) %>% summarise(avg_entry = mean(entry_volume), avg_exit = mean(exit_volume))
# Reoreder by day & interval
data3_2$interval <- factor(data3_2$interval,c("08PM-12AM","04PM-08PM","12PM-04PM","08AM-12PM","04AM-08AM","12AM-04AM"))
data3_2 %>%
  ungroup() %>%
  arrange(avg_entry) %>%
  mutate(lines = reorder(lines, avg_entry)) %>%
  ggplot(aes(y = avg_entry, x = lines)) + 
  geom_col(col='#0072B2', fill="#66CC99")  + ylab("Entry Count") + xlab("Line") + facet_wrap(~ interval) + coord_flip()

data3_2 %>%
  ungroup() %>%
  arrange(avg_exit) %>%
  mutate(lines = reorder(lines, avg_entry)) %>%
  ggplot(aes(y = avg_exit, x = lines)) + 
  geom_col(col='#0072B2', fill='#E69F00')  + ylab("Exit Count") + xlab("Line") + facet_wrap(~ interval) + coord_flip()

4.Average by station & interval

data4_1 <- turnstile %>% select(interval, station, station_id, entry_volume, exit_volume)   %>% mutate(station_unique = paste(station, station_id)) %>% group_by(station_unique, interval) %>% summarise(avg_entry = mean(entry_volume), avg_exit = mean(exit_volume))
# Reoreder by day & interval
data4_1$interval <- factor(data4_1$interval,c("08PM-12AM","04PM-08PM","12PM-04PM","08AM-12PM","04AM-08AM","12AM-04AM"))
data4_1 %>%
  ungroup() %>%
  arrange(avg_entry) %>%
  mutate(station_unique = reorder(station_unique, avg_entry)) %>%
  ggplot(aes(y = avg_entry, x = station_unique)) + 
  geom_col(col='#0072B2', fill="#66CC99")  + ylab("Entry Count") + xlab("Station") + facet_wrap(~ interval) + coord_flip()

data4_1 %>%
  ungroup() %>%
  arrange(avg_exit) %>%
  mutate(station_unique = reorder(station_unique, avg_entry)) %>%
  ggplot(aes(y = avg_exit, x = station_unique)) + 
  geom_col(col='#0072B2', fill='#E69F00')  + ylab("Exit Count") + xlab("Station") + facet_wrap(~ interval) + coord_flip()